## Ramsay 2011 IO

library(foreign) 
library(Amelia)

## Load original dataset
r2011 <- read.dta("R2011 IO Rep Data.dta")
head(r2011)
dim(r2011)

## Drop ID vars
r2011$country_name <- r2011$country <- NULL

## Drop derived dummy variables
r2011$yr1 <- r2011$yr2 <- r2011$yr3 <- r2011$yr4 <- r2011$yr5 <-r2011$yr6 <-r2011$yr7 <-r2011$yr8 <-r2011$yr9 <-r2011$yr10 <-r2011$yr11 <-r2011$yr12 <-r2011$yr13 <-r2011$yr14 <-r2011$yr15 <-r2011$yr16 <-r2011$yr17 <-r2011$yr18 <-r2011$yr19 <-r2011$yr20 <-r2011$yr21 <-r2011$yr22 <-r2011$yr23 <-r2011$yr24 <-r2011$yr25 <-r2011$yr26 <-r2011$yr27 <-r2011$yr28 <-r2011$yr29 <-r2011$yr30 <-r2011$yr31 <-r2011$yr32 <-r2011$yr33 <-r2011$yr34 <-r2011$yr35 <- r2011$yr36 <-r2011$yr37 <-r2011$yr38 <- r2011$coldwar <- NULL

## How many variables? 23: no reduction necessary
dim(r2011)

## Imputation

## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(r2011)
mean(NAs(r2011)/nrow(r2011))*100

## Thus: 8 imputations

set.seed(02138)
r2011.out <- amelia(r2011, m = 8, ts = "year", cs = "ccode", polytime = 3, lags = c("normpolity2", "logoilrevpc", "logORDdef2k"), empri = 0.01*nrow(r2011))

write.amelia(obj=r2011.out, file.stem = "R2011 IO Imp Data.dta", format = "dta", separate = FALSE)